The data are from a 2017 FreeCodeCamp survey targeted towards new coders who have less than 5 years of experience. It includes those who have attempted, completed and paid for bootcamps or other lessons. The data presented here are limited to the top 10 countries with the most survey respondents (USA, India, Canada, UK, Brazil, Germany, Poland, Russia, Australia and France).
---
title: "FreeCodeCamp 2017 Survey Summary"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(plotly)
```
Column {data-width=350}
-----------------------------------------------------------------------
### Summary
The data are from a 2017 FreeCodeCamp survey targeted towards new coders who have less than 5 years of experience. It includes those who have attempted, completed and paid for bootcamps or other lessons. The data presented here are limited to the top 10 countries with the most survey respondents (USA, India, Canada, UK, Brazil, Germany, Poland, Russia, Australia and France).
### Gender Distribution
```{r, fig.height = 10, fig.width=10}
survey_df <- read_csv("2017-fCC-New-Coders-Survey-Data.csv",
col_types = cols_only('Age' = col_double(),
'AttendedBootcamp' = col_integer(),
'BootcampName' = 'c',
'BootcampFinish' = 'c',
'CountryLive' = 'c',
'Gender' = 'c',
'MoneyForLearning' = col_double(),
'MonthsProgramming' = col_double()
))
#MoneyForLearning describes in USD, the amount of money spent by participants from the moment they started coding
#until they completed the survey
#calculate money spent per month of programming across all those who participated in the survey
#limit analysis to United States of America, India, United Kingdom, Canada, Brazil, Germany, Poland, Russia, Australia, France
#these are the top 10 countries in terms of survey response
#df <- survey_df %>% group_by(CountryLive) %>% count() %>% arrange(desc(n))
countries = c("United States of America", "India", "United Kingdom", "Canada", "Brazil", "Germany",
"Poland", "Russia", "Australia", "France")
gender_df <- survey_df %>% filter(CountryLive %in% countries, !(is.na(Gender))) %>% group_by(Gender) %>% count()
total_counts <- sum(gender_df$n)
gender_df <- gender_df %>% mutate(percent = n / total_counts * 100)
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
)
agender = paste("agender", round(gender_df$percent[[1]], 2), "%", sep=" ")
female = paste("female", round(gender_df$percent[[2]], 2), "%", sep=" ")
gender_queer = paste("gender queer", round(gender_df$percent[[3]], 2), "%", sep=" ")
male = paste("male", round(gender_df$percent[[4]], 2), "%", sep=" ")
trans = paste("trans", round(gender_df$percent[[5]], 2), "%", sep=" ")
labels = c(agender, female, gender_queer, male, trans, labels)
ggplot(gender_df, aes(x="", y=percent, fill=Gender))+
geom_bar(width = 1, stat = "identity") + coord_polar("y", start=0) + blank_theme +
scale_fill_brewer(palette="Paired", labels=labels) + theme(axis.text.x=element_blank()) +
guides(fill=guide_legend(nrow=2, byrow = FALSE, title="")) + theme(plot.title = element_text(hjust = 0.5), legend.text=element_text(size=25), legend.position = "bottom")
```
Column {data-width=550}
-----------------------------------------------------------------------
### Top 5 Completed Bootcamps
```{r, fig.height = 15, fig.width=10}
#what bootcamps were most often completed?
bootcamp_df <- survey_df %>% filter(AttendedBootcamp == 1, !(is.na(BootcampName)), CountryLive %in% countries) %>% group_by(BootcampName) %>% count() %>% arrange(desc(n)) %>% head(n=5)
ggplot(bootcamp_df, aes(x=BootcampName, y=n, fill=BootcampName)) + geom_bar(stat="identity") + ylab("Number of People") + scale_fill_brewer(palette="Set2") + xlab("") + geom_text(aes(label=n), position=position_dodge(width=0.9), vjust=-0.25, size=8) +
theme(legend.position="None", text = element_text(size = 20), axis.text.x = element_text(angle = 35), panel.background = element_blank())
```
Column {data-width=550}
-----------------------------------------------------------------------
### Median Amount Spent per Month Programming (USD)
```{r, fig.height = 7, fig.width=15}
survey_df$MonthsProgramming[is.na(survey_df$MonthsProgramming)] <- 0
survey_df$MoneyForLearning[is.na(survey_df$MoneyForLearning)] <- 0
#Filter out extreme outliers, limit total money spent to $20000
learning_money_df <- survey_df %>% filter(CountryLive %in% countries, MoneyForLearning > 0, MoneyForLearning <= 20000, MonthsProgramming > 0) %>%
mutate(money_per_month = MoneyForLearning / MonthsProgramming)
learning_money <- learning_money_df %>%
group_by(CountryLive) %>% summarize(med_money_spent = median(money_per_month, na.rm=TRUE)) %>%
arrange(desc(med_money_spent))
p <- ggplot(learning_money, aes(x=CountryLive, y=med_money_spent, cex=3)) + geom_bar(stat="identity", fill="steelblue", aes(text=paste("Median Spent: $", round(med_money_spent,2)))) + ylab("USD") + coord_flip() + xlab("") +
theme(legend.position="None", plot.title = element_text(hjust = 0.5), text = element_text(size = 10), panel.background = element_blank())
ggplotly(p, tooltip="text")
```
### Age Distribution
```{r, fig.height = 7, fig.width = 15}
learning_money_df$Age[is.na(learning_money_df$Age)] <- 0
age_df <- learning_money_df[learning_money_df$Age > 0,]
#which.max(ggplot_build(p)$data[[1]]$count) gives the index of the max
p <- ggplot(age_df, aes(x=Age)) + geom_histogram(bins=30, color="black", fill="grey")
most_common_age_idx <- which.max(ggplot_build(p)$data[[1]]$count)
most_common_age <- ggplot_build(p)$data[[1]]$x[most_common_age_idx]
p <- p + geom_vline(xintercept = most_common_age, color="red") + xlab("Age") + ylab("") + theme(plot.title = element_text(hjust = 0.5) ,text=element_text(size=10), panel.background = element_blank())
ggplotly(p)
```